import requests
import re
from pymongo import MongoClient


client = MongoClient('mongodb://localhost:27017/')
db = client['xin_wb']
collection = db['2023.11.5']


url = 'https://www.newrank.cn/ranklist/weibo/1/1/2023-11-05'

hed = {
    "User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.46',
}

res = requests.get(url, headers=hed)


html_content = res.text  # 替换成你的HTML文本
script_tags = re.findall(r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>', html_content, re.DOTALL)


name = re.findall(r'\"name\":\"(.*?)\"', html_content, re.DOTALL)
# 点赞数量
like_count = re.findall(r'\"like_count\":\"(.*?)\"', html_content, re.DOTALL)
# 排名
rank_position = re.findall(r'\"rank_position\":\"(.*?)\"', html_content, re.DOTALL)
# 评论
comments_count = re.findall(r'\"comments_count\":\"(.*?)\"', html_content, re.DOTALL)
# 新榜指数
newrank_index = re.findall(r'\"newrank_index\":\"(.*?)\"', html_content, re.DOTALL)
# 转发
reposts_count = re.findall(r'\"reposts_count\":\"(.*?)\"', html_content, re.DOTALL)
# 粉丝
followers_count = re.findall(r'\"followers_count\":\"(.*?)\"', html_content, re.DOTALL)

for i in range(len(name)):
    data = {
        '排名': rank_position[i],
        "name": name[i],
        "点赞": like_count[i],
        "评论": comments_count[i],
        "新榜指数": newrank_index[i],
        "转发": reposts_count[i],
        "粉丝": followers_count[i]
    }
    collection.insert_one(data)


























